Participants completed one diet (Keto or American Heart Association - AHA), did a wash-out, then switched to the other diet. We wanted to see if the diet readout reflects this.


Make output directory

if (!dir.exists(params$save_dir)) { system(paste("mkdir", params$save_dir)) }

Load packages

pacman::p_load("magrittr", "ggpubr", "mixOmics", "santaR", "tidyverse")

Read in files

ont <- read_delim(params$ont_file, delim = "\t") %>%
  filter(!duplicated(sample_name)) # two food samples have the same name "mackerel_gonad_13"
meta <- read_csv(params$meta_file)
# get files matching string provided
f_counts_files <- list.files(params$f_counts_dir, pattern = params$f_counts_name)
for (f in f_counts_files) {
  assign(gsub("\\.csv", "", f), read.csv(file.path(params$f_counts_dir, f), row.names = 1))
}

Format data

ont <- set_rownames(as(ont, "matrix"), ont$sample_name)
meta <- set_rownames(as.data.frame(meta), meta$filename)

Transform food counts to proportions

transform_f_counts <- function(fc) {
  filenames <- row.names(fc)
  fc <- fc %>%
    apply(., 1, function(x) {ifelse(x != 0, x/sum(x), 0)}) %>%
    t() %>%
    data.frame() %>%
    mutate(filename = filenames) %>%
    left_join(meta, by = "filename")
  return(fc)
}

for (f in grep("^BEAM_.*food_counts", ls(), value = TRUE)) {
  transf <- transform_f_counts(get(f))
  assign(paste0("prop_", f), transf)
}

Transform food counts using centered log-ratio (CLR)

for (f in grep("^BEAM_.*food_counts", ls(), value = TRUE)) {
  transf <- logratio.transfo(get(f), logratio = "CLR", offset = 1) %>%
    as("matrix") %>%
    data.frame() %>%
    rownames_to_column(var = "filename") %>%
    left_join(meta, by = "filename")
  assign(paste0("clr_", f), transf)
}

Can we see longitudinal changes in diet profiles?

Time series analysis on individual foods using spline-fitting method implemented in santaR

# get levels used from environment
levs <- grep("food_counts_L", ls(), value = TRUE) %>%
  gsub(".*food_counts_", "", .) %>%
  unique()
for (transf in c("prop", "clr")) { # analyze both transformations
  for (lev in levs) { # and all levels provided
    for (biosp in c("fecal", "serum")) { # and both biospecimen types
      df <- get(paste0(transf, "_BEAM_", biosp, "_food_counts_", lev))
      # keep patients with at least 4 time points
      keep <- names(which(table(df$SubjectID) > 3))
      df <- df %>%
        filter(First.Diet %in% c("1", "2"),
               SubjectID %in% keep)
      foods <- colnames(get(paste0("BEAM_", biosp, "_food_counts_", lev)))
      sp <- santaR_auto_fit(inputData = df[,foods], ind = df[["SubjectID"]], 
                            time = as.numeric(df[["Study_TP"]]), group = df[["First.Diet"]], 
                            df = 4)
      assign(paste("sp", transf, lev, biosp, sep = "_"), sp)
    }
  }
}

Plot mean spline for each group for each food

plot_spline <- function(sp, savename) {
  foods <- names(sp)
  # get BH-corrected pvalues
  pvals <- santaR_auto_summary(sp)$pval.all %>%
    mutate(food = row.names(.))
  for (f in 1:length(foods)) {
    food <- foods[f]
    pval <- round(pvals$dist_BH[pvals$food == food], digits = 4)
    # plot group mean splines with confidence intervals
    p <- santaR_plot(sp[[food]], 
                     showIndPoint=FALSE,
                     showIndCurve=FALSE,
                     xlab = "Time point", 
                     ylab = ifelse(grepl("clr", savename), "CLR(food count)", "Food abundance (%)"), 
                     title = paste(food, paste0("(FDR=", pval, ")")),
                     colorVect = c("orange", "dodgerblue")) +
      scale_x_continuous(labels = c("1" = "1: Pre-diet 1", "2" = "2: Diet 1", "3" = "3: Post-diet 1/Pre-diet 2", 
                                    "4" = "4: Diet 2", "5" = "5: Post-diet 2")) +
      rotate_x_text(angle = 45) +
      theme(axis.text = element_text(size = 5),
            panel.grid.major = element_blank(), 
            panel.grid.minor = element_blank(), 
            title = element_text(size = 7)) 
    ggsave(file.path(params$save_dir, paste0("BEAM_time_series_plot_", savename, "_", food, ".pdf")), 
           p, width = 4, height = 3)
    # if p<0.05, show plot in notebook
    if (pvals$dist[pvals$food == food]<0.05) { print(p) }
  }
}

Plot will be displayed if P<0.05, even if not significant after adjusting for multiple hypothesis tests. Group 1 started with Keto diet, Group 2 started with AHA diet.

for (sp in grep("sp_", ls(), value = TRUE)) {
  obj <- get(sp)
  suppressMessages(plot_spline(obj, gsub("sp_", "", sp)))
}

Session Info

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1   stringr_1.4.0   dplyr_1.0.5     purrr_0.3.4    
##  [5] readr_1.4.0     tidyr_1.1.3     tibble_3.1.0    tidyverse_1.3.0
##  [9] santaR_1.0      mixOmics_6.14.1 lattice_0.20-41 MASS_7.3-53.1  
## [13] ggpubr_0.4.0    ggplot2_3.3.3   magrittr_2.0.1 
## 
## loaded via a namespace (and not attached):
##  [1] fs_1.5.0            matrixStats_0.58.0  lubridate_1.7.10   
##  [4] doParallel_1.0.16   RColorBrewer_1.1-2  httr_1.4.2         
##  [7] tools_4.0.3         backports_1.2.1     bslib_0.2.4        
## [10] utf8_1.2.1          R6_2.5.0            DBI_1.1.1          
## [13] colorspace_2.0-0    withr_2.4.1         tidyselect_1.1.0   
## [16] gridExtra_2.3       curl_4.3            compiler_4.0.3     
## [19] cli_2.3.1           rvest_1.0.0         pacman_0.5.1       
## [22] xml2_1.3.2          labeling_0.4.2      sass_0.3.1         
## [25] scales_1.1.1        digest_0.6.27       foreign_0.8-81     
## [28] rmarkdown_2.7       rio_0.5.26          pkgconfig_2.0.3    
## [31] htmltools_0.5.1.1   highr_0.8           dbplyr_2.1.0       
## [34] fastmap_1.1.0       rlang_0.4.10        readxl_1.3.1       
## [37] rstudioapi_0.13     shiny_1.6.0         farver_2.1.0       
## [40] jquerylib_0.1.3     generics_0.1.0      jsonlite_1.7.2     
## [43] BiocParallel_1.24.1 zip_2.1.1           car_3.0-10         
## [46] Matrix_1.3-2        Rcpp_1.0.6          munsell_0.5.0      
## [49] fansi_0.4.2         abind_1.4-5         lifecycle_1.0.0    
## [52] stringi_1.5.3       yaml_2.2.1          carData_3.0-4      
## [55] plyr_1.8.6          grid_4.0.3          parallel_4.0.3     
## [58] promises_1.2.0.1    ggrepel_0.9.1       crayon_1.4.1       
## [61] haven_2.3.1         hms_1.0.0           knitr_1.31         
## [64] pillar_1.5.1        igraph_1.2.6        ggsignif_0.6.1     
## [67] corpcor_1.6.9       reshape2_1.4.4      codetools_0.2-18   
## [70] reprex_1.0.0        glue_1.4.2          evaluate_0.14      
## [73] modelr_0.1.8        data.table_1.14.0   vctrs_0.3.7        
## [76] httpuv_1.5.5        foreach_1.5.1       cellranger_1.1.0   
## [79] gtable_0.3.0        assertthat_0.2.1    xfun_0.22          
## [82] openxlsx_4.2.3      mime_0.10           xtable_1.8-4       
## [85] broom_0.7.5         RSpectra_0.16-0     rstatix_0.7.0      
## [88] later_1.1.0.1       rARPACK_0.11-0      shinythemes_1.2.0  
## [91] iterators_1.0.13    ellipse_0.4.2       ellipsis_0.3.1